# -*- coding: utf-8 -*-
"""
Created on Fri Dec 04 10:30:07 2015

@author: liliangrong
"""

def get_url_list(pn):
    #import urllib
    import urllib2
    import re
    url = "http://python.jobbole.com/category/guide/page/%d/" % pn
    try:
        resp = urllib2.urlopen(url)
        page_source = resp.read()
    
        pattern = re.compile('<a target="_blank" class="archive-title" href="(.*?)".*?>.*?</a>',re.S)
    
        url_list = re.findall(pattern,page_source)
        return url_list
    except urllib2.URLError,e:
        if hasattr(e,'code'):
            print e.code
        if hasattr(e,'reason'):
            print e.reason
    
if __name__ == "__main__":
    import time
    start = time.clock()
    
    f = open('url_list.txt','w')
    for pn in xrange(1,10):
        url_lists = get_url_list(pn)
        for item in url_lists:
            if item.strip() != None:            
                f.write(item.strip())
                f.write("\n")
    f.close()
        
    end = time.clock()
    print "run time is %f s" % (end-start)